# -*- coding:utf-8 -*-


from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import bs4

from time import sleep
from entity.IndexUrl import IndexUrl
class ColumnCrawl:

    def getUrls(self, columnUrl):
        urls = []
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        browser = webdriver.Chrome(chrome_options=chrome_options)
        browser.get(columnUrl)
        browser.find_element_by_id("dataMoreBtn").click()
        sleep(0.5)
        browser.find_element_by_id("dataMoreBtn").click()
        sleep(0.5)
        page = browser.page_source
        browser.close()

        soup = bs4.BeautifulSoup(page, 'html5lib')
        elems = soup.select('#showData0 .clearfix h3 a')
        for e in elems:
            urls.append(e['href'])
        return urls

